library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0       ✔ purrr   0.3.0  
## ✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.2       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tseries) #2
library(forecast)#2
library(readxl)#2

library(sf) #3
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
library(leaflet) #3

library(ggplot2)#4

Task 1

Task 2

truckee_flow <- read_csv("truckee_flow.csv") #read in data
#remove unnecessary columns
truckee_df <- truckee_flow %>%
  select(year_nu, mean_va) %>% 
  rename(year = year_nu, cfs=mean_va)

Part A

Convert to ‘df_flow’ to time-series data

truckee_ts <- ts(truckee_df$cfs, frequency = 12, start = c(2000,1))

Plot ‘tru_ts’

plot(truckee_ts)

Decompose the plot for further data exploration

truckee_dc <- decompose(truckee_ts)

plot(truckee_dc)

The time-series data for streamflow in the Truckee River displays a seasonal pattern. Streamflows are lower in the beginning of each year and steadily increase until they peak a few months later, then decreases throughout the rest of the year with minor fluctuations. The peaks in each year tend to vary and is likely due to variations in snowpack and other precipitation events.

Part B

Holt-Winters

truckee_hw <-HoltWinters(truckee_ts) #Computes Holt-Winters Filtering of a given time series. Unknown parameters are determined by minimizing the squared prediction error.

#tru_hw # gives more weight to more recent observations

plot(truckee_hw)

5-year forecast of Truckee River flow

truckee_fc <- forecast(truckee_hw, h = 60) # 5 year forecast... h = 60 months or 5 years

plot(truckee_fc)

Part C

Visualize model residuals

hist(truckee_fc$residuals)# residuals look normally distributed so model is a good fit... aka not over/under estimating

#Task 3

nps<-read_sf(dsn = ".", layer = "nps_boundary") #read in nps data
nps_ca<-nps %>% 
  filter(STATE=="CA") #filter for state

nps_ca$UNIT_TYPE<-as.factor(nps_ca$UNIT_TYPE) #convert park type to factor

factpal <- colorFactor(topo.colors(8), nps_ca$UNIT_TYPE) #choosing palette for polygon golors.
#I chose to use the park type as the color, however this can be by individual park, or whatever you choose 
nps_ca_map <-nps_ca %>% 
                  leaflet() %>% 
                  addTiles() %>% 
                  addPolygons(fillColor = ~factpal(UNIT_TYPE), #color of each polygon
                              fillOpacity = 1, #the opacity of the polygons
                              stroke = TRUE, #whether to outline the park boundaries,
                              color = "black",#color of polygon boundaries 
                              weight = 0.2, #stroke width in pixels
                              opacity = 1, #stroke opacity 
                              label = nps_ca$PARKNAME, #label of park when you hover over it
                              highlightOptions = highlightOptions(color = "white", weight = 5, bringToFront = TRUE),
                              labelOptions = labelOptions(style = list("font-weight" = "normal", padding = "3px 8px"),
                                                          textsize = "15px",direction = "auto")) %>% #how the park name shows up when you hover over it

                  addLegend("bottomright", pal = factpal, values = ~UNIT_TYPE,
                            title = "Park Classification", 
                            opacity = 1)
                              
nps_ca_map

Task 4

lizard<-read_csv("lter_lizard_pitfall.csv")
## Warning: Missing column names filled in: 'X2' [2], 'X3' [3], 'X4' [4],
## 'X5' [5], 'X6' [6], 'X7' [7], 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11],
## 'X12' [12], 'X13' [13], 'X14' [14]
## Parsed with column specification:
## cols(
##   `File Name: JornadaStudy_007_npp_lizard_pitfall_trap_data.csv     Last Update: 01/08/2013   By: John Anderson` = col_character(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_character(),
##   X5 = col_character(),
##   X6 = col_character(),
##   X7 = col_character(),
##   X8 = col_character(),
##   X9 = col_character(),
##   X10 = col_character(),
##   X11 = col_character(),
##   X12 = col_character(),
##   X13 = col_character(),
##   X14 = col_character()
## )
#data wrangling
colnames(lizard) = lizard[47, ] #assigning column names
lizard<-lizard[-c(1:47), ] #taking out unnecessary rows

lizard_df<-lizard %>% 
  filter(rcap!="R") %>% #taking out repeat individuals
  filter(site=="CALI")#taking out unnecessary sites
#male vs female weight (g)
lizard_weight<-lizard_df %>% 
  filter(sex=="F"|sex=="M") %>% 
  filter(weight!= ".") %>% 
  #group_by(sex) %>% 
  mutate(weight=as.numeric(weight)) %>% 
  mutate(sex=as.factor(sex))

library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
mu <- ddply(lizard_weight, "sex", summarise, grp.mean=mean(weight))

ggplot(lizard_weight, aes(x=weight, color=sex))+
  geom_histogram(fill="white", alpha=0.5, position="identity")+
    geom_vline(data=mu, aes(xintercept=grp.mean, color=sex),
             linetype="dashed")+
  theme(legend.position="top")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

with(lizard_weight, shapiro.test(weight[sex == "F"]))
## 
##  Shapiro-Wilk normality test
## 
## data:  weight[sex == "F"]
## W = 0.58543, p-value = 7.423e-12
with(lizard_weight, shapiro.test(weight[sex == "M"]))
## 
##  Shapiro-Wilk normality test
## 
## data:  weight[sex == "M"]
## W = 0.58315, p-value = 8.465e-11
#res.ftest <- var.test(weight ~ group, data = lizard_weight)
#res.ftest